* NOTE: if Stata does not do so automatically, cd to the top level directory of the data depository (where this script is stored)
use data\maindata.dta, clear
estimates clear


cap program drop stratFisher
program define stratFisher, rclass // this program performs a Fisher exact test on a 2x2 table when data are drawn from strata and thus should only be permuted within strata (see Jung 2004)
	syntax varname (numeric) [if], FACtor(varname) STRATifiedby(varlist) // "varname" is the outcome variable. It and the factor must be binary 0/1, with one being success and treated, respectively
	preserve
	qui {
		if "`if'"!="" keep `if'
		egen factor = group(`factor') // in case variable isn't coded 0/1
		replace factor = factor-1
		assert inlist(`varlist',0,1) & inlist(factor,0,1)
		count if `varlist' & factor
		scalar S = r(N)
		collapse (count) n=`varlist' (sum) z=`varlist' m=factor, by(`stratifiedby') // "(count) n=`varlist'" only gets the number of observations for each stratum
		gen j = _n // stratum identifier
		scalar J = _N // number of strata
		gen m_minus = max(0,z-n+m)
		gen m_plus = min(z,m)
		expand m_plus-m_minus+1
		bys j: gen i = m_minus+_n-1
		gen double f = hypergeometricp(n,z,m,i)
		recode f (.=1) // Stata generates missing values when there is only one possible allocation of successes
		keep j i f
		compress
		tempfile fulldata
		save `fulldata'
		forvalues j=2/`=J' {
			use `fulldata' if j==`j', clear
			drop j
			tempfile file`j'
			save `file`j''
			}
		use `fulldata' if j==1
		drop j
		rename (i f) (I f_)
		forvalues j=2/`=J' {
			cross using `file`j''
			replace I = I+i
			replace f_ = f_*f // f_ is product up to now; f is density of new strata's value
			collapse (sum) f_, by(I)
			}
		gen gr = (I>S)*f_
		gen eq = (I==S)*f_
		collapse (sum) gr eq f_
		assert float(f_)==1 // summing all probabilities must yield 1
		gen p = eq + min(gr,1-eq-gr)
	}
	return scalar p = min(1,2*p[1]) // 2*p could be >1 if I=S has large mass
	di "number of successes " S ", two-sided p = " return(p)
	restore
end

/***********************************************************************
		Precedent and Defendant
***********************************************************************/

**********************************
*** Figure 3 *********************

gen guilty2=guilty if def_nat=="unsympathetic":nationality
gen guilty3=guilty if def_nat=="sympathetic":nationality
graph dot guilty guilty2 guilty3, over(precedent, label(labsize(*.7))) over(country, gap(100)) ///
	marker(1, msymbol(O) mcolor(black)) marker(2, msymbol(dh) mcolor(blue) msize(*1.5)) marker(3, msymbol(sh) mcolor(orange_red) msize(*1.5)) ///
	legend(rows(1) order(- "Defendant:" 1 2 3) label(1 "both") label(2 "unsympathetic") label(3 "sympathetic") size(*.7)) ///
	linetype(line) lines(lwidth(*.5)) ///
	ytitle("Rate") title("Fig. 3: Affirmance Rates by Country (and Precedent and Defendant)", span size(*.9)) scheme(s1color)  ///
	saving(graphs\affirmance_rates_by_country, replace)

**********************************
*** statistical tests ************

* univariate
local factors def_nationality precedent 
foreach factor in `factors' {
	foreach r in "1" "canceled!=1" "misunderstanding!=1" "inrange(prec_mentioned_gen,2,4)" { // the first one ("1") is an always-true placeholder to make "if" syntanx work with unrestricted tests
		if "`r'"!="1" di _newline "restriction: `r'"
		tab guilty `factor' if `r', column nofreq exact nolog
		if "`factor'"=="def_nationality" {
			qui stratFisher guilty if `r', fac(`factor') strat(country `: list factors - factor')
			di %5.3f r(p) " - stratified Fisher exact p"
			}
		else {
			forvalues excprec=1/3 { // stratFisher can only deal with two levels of the factor so need to exclude one
				qui stratFisher guilty if `r' & precedent!=`excprec' , fac(`factor') strat(country `: list factors - factor')
				di %5.3f r(p) " - stratified Fisher exact p, excluded precedent " `excprec'
				}
			}
		}
	}

spearman guilty precedent // takes into account ordering of precedent

* multivariate
anova guilty def_nat precedent

clogit guilty i`="sympathetic":nationality'.def_nat i.precedent, group(country) or
	test 2.precedent 3.precedent // joint test of precedent
	lincom i1.def_nat-3.precedent, or // OR estimate and 95% c.i. for going from sympathetic/REVERSE to unsympathetic/Affirm, or OR(Affirm/REVERSE)/OR(Unsympathetic/Sympathetic) 

gen reverse=precedent==2 // exlogistic can't handle factor variables
gen REVERSE=precedent==3
gen sympathetic = def_nat=="sympathetic":nationality
exlogistic guilty sympathetic reverse REVERSE, group(country) memory(2g) nolog terms(precedent = reverse REVERSE)
	exlogistic, test(score) // this calculates the p-value for the joint test of terms() in the previous line

/***********************************************************************
		Anchor
***********************************************************************/

**********************************
*** Figure 4 *********************

graph hbox sentence if sentence<200, over(anchor, gap(*.3) label(labsize(*.7))) over(country) noouts ///
	intensity(0) medtype(cline) medline(lcolor(red) lwidth(*4)) cwhiskers lines(lpattern(dash)) ///
	marker(1, msymbol(o) mcolor(black) msize(*.5)) ///
	ytitle("Sentence (years)") title("Fig. 4: Sentences by Country (and Anchor)") ///
	note("Medians (red marker), 25th and 75th percentiles (box), and adjacent values (whiskers)." ///
	"Five U.S. outlier sentences (11019, 1540, 1040, 210, and 209 years) omitted.") ///
	scheme(s1color) saving(graphs\sentences_by_anchor_country, replace)

**********************************
*** statistical tests ************

* univariate
spearman sentence anchor
spearman sentence anchor if sentence<200
pwcorr sentence anchor, sig
pwcorr sentence anchor if sentence<200, sig
bys country: spearman sentence anchor if sentence<200

* regressions (--> T. S.2)
gen ln_sentence = ln(sentence)
xtset country
foreach anchorvar in anchor i.anchor {
	estimates drop _all
	_eststo: xtreg sentence i`="sympathetic":nationality'.def_nat i.precedent `anchorvar' if sentence<200, fe robust
		test 2.precedent 3.precedent
		estadd scalar joint_p = r(p)
	_eststo: qreg sentence i`="sympathetic":nationality'.def_nat i.precedent `anchorvar' i.country
		test 2.precedent 3.precedent
		estadd scalar joint_p = r(p)
	_eststo: xtreg ln_sentence i`="sympathetic":nationality'.def_nat i.precedent `anchorvar', fe robust
		test 2.precedent 3.precedent
		estadd scalar joint_p = r(p)
	esttab using tables\regressions_`anchorvar'.csv, ///
		drop(?.country _cons) order(anchor) noconstant nobaselevels ///
		scalars(joint_p) ci obslast b(%4.2f) ci(%4.2f) transform(anchor @*15 15) nostar label replace // note that anchor is recorded per 15 years
	}